rm(list = ls())
library(haven)
library(lubridate)
library(tidyverse)
library(foreign)
library(ggplot2)
library(ggthemes)
library(estimatr)

df = readRDS("data/jordan.rds")
gc()

df %>% View

source("functions/pca_fun.R")
source("functions/effect_plotter2.R")


# safety syria------------------------------------------------------------------
df$orig_safe = df$orig_safe0 = recode(df$`q48.Howwouldyoudescribetherisktocivilians’physicalsafetysuchas`, 
                            "a. Very dangerous" = 0, "b. Somewhat dangerous" = 1, "c. Somewhat safe" = 2, 
                            "d. Very safe" = 3, .default = NA_real_)
df$orig_safe[is.na(df$orig_safe)] = mean(df$orig_safe, na.rm = T)

df$q128.WhatwerethemainreasonsforyoutoleaveyourhomecountrySyriaa.Vi %>% table(useNA = "always")
df$left_violence = recode(df$q128.WhatwerethemainreasonsforyoutoleaveyourhomecountrySyriaa.Vi, "No" = 0, "Yes" = 1, .default = NA_real_)
df$left_no_violence = 1 - df$left_violence
  
df$head_age = as.numeric(df$q6.Whatisyourage)
df$q4.Gender %>% table(useNA = "always")
df$head_female = ifelse(df$q4.Gender == "Male", 0, 1)

df$head_conscription = 0
df$head_conscription[df$head_female == 0 & df$head_age <= 42] = 1

df$head_no_conscription = 1 - df$head_conscription

safety = c("orig_safe", "left_no_violence", "head_no_conscription")
summary(df[,safety])
df$index_safety = PCA_extract(vars = df[, safety], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, safety], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, safety], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals)
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_safety_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, safety], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_safety_variance_jo.csv")

# econ_well_being_syria ---------------------------------------------------
df$orig_jobs = df$orig_jobs0 = recode(df$q49.Howwouldyoudescribetheavailabilityofjobsatpresentinyourplace, 
                   "a. Very bad" = 0, "b. Somewhat bad:" = 1, "c. Somewhat good:" = 2, 
                   "d. Very good:" = 3, .default = NA_real_)

df$orig_jobs[is.na(df$orig_jobs)] = mean(df$orig_jobs, na.rm = T)
df$orig_jobs = (df$orig_jobs - mean(df$orig_jobs))/sd(df$orig_jobs)

df$index_econ_syria = df$orig_jobs


# services syria ----------------------------------------------------------
df$orig_electricity = df$orig_electricity0 = recode(df$q50.Asfarasyouknowhowmanyhoursperdayisthereelectricityinyourplac, 
                          "e. There is no electricity in my place of origin" = 0,
                          "d. Less often" = 1, 
                          "c. Yes, some electricity every week, but not every day" = 2, 
                          "b. Yes, some electricity each day, but not all day" = 3, 
                          "a. Yes, 24 hours a day" = 4, .default = NA_real_)
df$orig_electricity[is.na(df$orig_electricity)] = mean(df$orig_electricity, na.rm = T)

df$orig_water = df$orig_water0 = recode(df$q51.Asfarasyouknowhowmanyhoursperdayisthererunningwaterinyourpla, 
                    "e. There is no Water in my place of origin" = 0,
                    "d. Less often" = 1, 
                    "c. Yes, some water every week, but not every day" = 2, 
                    "b. Yes, some water each day, but not all day" = 3, 
                    "a. Yes, 24 hours a day" = 4, .default = NA_real_)
df$orig_water[is.na(df$orig_water)] = mean(df$orig_water, na.rm = T)

df$orig_schools = df$orig_schools0 = recode(df$q52.Asfarasyouknowareschoolsoperatinginyourplaceoforigin, 
                      "b. No" = 0, "a. Yes" = 1, .default = NA_real_)
df$orig_schools[is.na(df$orig_schools)] = mean(df$orig_schools, na.rm = T)

df$orig_health = df$orig_health0 = recode(df$q53.Asfarasyouknowarehealthcentersoperatinginyourplaceoforigin, 
                     "b. No" = 0, "a. Yes" = 1, .default = NA_real_)
df$orig_health[is.na(df$orig_health)] = mean(df$orig_health, na.rm = T)


services_syr = c("orig_electricity", "orig_water", "orig_schools", "orig_health")
summary(df[,services_syr])
df$index_services_syria = PCA_extract(vars = df[, services_syr], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, services_syr], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, services_syr], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_services_syr_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, services_syr], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_services_syr_variance_jo.csv")



# Networks ----------------------------------------------------------------
df$q136.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon = df$q136.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon %>% as.numeric
df$q135.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon = df$q135.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon %>% as.numeric

df$friend_rtrn_orig = df$q136.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon
df$friend_rtrn_orig[df$friend_rtrn_orig > 15] = 15 #some said that have 350 people friends who returned to Syria (100 to place of origin). We're capping at 15 to make consistent with our survey


df$friend_rtrn_syr = rowSums(df[,c("q136.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon", "q135.ApproximatelyhowmanyofyourrelativesorfriendsinJordanhavegon")], na.rm = T)
df$friend_rtrn_syr[df$friend_rtrn_syr > 15] = 15 #some said that have 350/380 people friends who returned. Im capping at 15 to make consistent with our survey



networks_syr = c("friend_rtrn_syr", "friend_rtrn_orig")
summary(df[,networks_syr])
df$index_networks_syria = PCA_extract(vars = df[, networks_syr], cor_type = "pear")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, networks_syr], cor_type = "pear")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, networks_syr], cor_type = "pear")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_networks_syr_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, networks_syr], cor_type = "pear")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_networks_syr_variance_jo.csv")



# information quality -----------------------------------------------------
df$know_safety = recode(df$q43.Whatdoyouhaveinformationabouta.Safetysecuritynewsa, "Yes" = 1, "No" = 0, .missing = 0) #Missing is zero because because these questions were only asked to individuals who said they have access to info in Syria
df$know_infrastructure = recode(df$q43.Whatdoyouhaveinformationaboutc.Statusofinfrastructureinapart, "Yes" = 1, "No" = 0, .missing = 0)
df$know_friends = recode(df$q43.Whatdoyouhaveinformationaboutf.NewsaboutfriendsfamilyinSyria, "Yes" = 1, "No" = 0, .missing = 0)

df$orig_know_confidence = df$orig_know_confidence0 = recode(df$q54.Howconfidentareyouinyourknowledgeaboutconditionsinyourplaceo, 
                              "a. Not confident at all" = 0, "b. Somewhat not confident" = 1, 
                              "c. Neither confident nor not confident" = 2, 
                              "d. Somewhat confident" = 3, "e. Very confident" = 4, .default = NA_real_)

info_quality = c("know_safety", "know_infrastructure", "know_friends", "orig_know_confidence")
summary(df[,info_quality])
df$index_info_quality = PCA_extract(vars = df[, info_quality], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, info_quality], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, info_quality], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_info_quality_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, info_quality], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_info_quality_variance_jo.csv")



# Well being Jordan -------------------------------------------------------
df$hh_work = ifelse(df$q239.Isanymemberofyourhouseholdcurrentlyworking == "b. No:لا", 
                    0, 1)
df$head_work = ifelse(df$q240.Areyoucurrentlyworking == "c. No", 0, 1)

df$employed = ifelse(df$hh_work == 1 | df$head_work == 1, 1, 0)

df$aid_twomonths = recode(df$q28.Haveyoureceivedassistancefromthem, "Yes" = 1, "No" = 0, .default = NA_real_)

df$aid_change = recode(df$q34.Ifyouthinkbackaboutthesituationoneyearagointermsofaccesstoas, "a. Improved" = 2, "b. Stayed the same" = 1, "c. Deteriorated" = 0, .default = NA_real_)

df$work_legal = recode(df$q254.Doyouoranyofyourhouseholdhaveaworkpermit, "b. No" = 0, "a. Yes" = 1, .default = NA_real_)

df$food_aid = recode(df$q224.Haveyoureceivedfoodvouchersduringthelastmonth, "b. No" = 0, "a. Yes" = 1)
df$food_aid %>% summary


econ_jordan = c("employed", "aid_twomonths", "aid_change", "work_legal", "food_aid")
summary(df[,econ_jordan])
df$index_econ_jordan = PCA_extract(vars = df[, econ_jordan], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, econ_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, econ_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_econ_jordan_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, econ_jordan], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_econ_jordan_variance_jo.csv")



# services jordan ---------------------------------------------------------
df$health_no_access = recode(df$q87.Hasitimpactedyourabilitytoaccesshealthservices, "Yes" = 1, "No" = 0, 
                          .default = NA_real_)

df$no_school = ifelse(as.numeric(df$q101a.Boys) > 0 | as.numeric(df$q101b.Girls) > 0, 1,
                   ifelse(as.numeric(df$q101a.Boys)==0 & as.numeric(df$q101b.Girls)==0, 0, 0))

df$no_school[is.na(df$no_school)] = 0

#reverse coding
df$health_access = 1 - df$health_no_access
df$school_access = 1 - df$no_school

services_jordan = c("health_access", "school_access")
summary(df[,services_jordan])
df$index_services_jordan = PCA_extract(vars = df[, services_jordan], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, services_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, services_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_services_jordan_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, services_jordan], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_services_jordan_variance_jo.csv")


# Network Jordan ----------------------------------------------------------
df$net_jo1 = recode(df$q180.Inthelast12monthshowoftendidyoushareamealwithJordanianswhoa,
                            "a. Never" = 1, "b. Once a year" = 2, "c. Once a month" = 3, 
                            "d. Once a week" = 4, "e. Almost every day" = 5)

df$net_jo2 = recode(df$q181.PleasethinkabouttheJordaniansinyourphonecontacts.Withhowman,
                            "a. 0" = 1, "b. 1 to 2" = 2, "c. 3 to 6" = 3, "d. 7 to 14" = 4, "e. 15 or more" = 5)

df$net_jo3 = recode(df$q182.PleasethinkabouttheSyrianinJordaninyourphonecontacts.Withho,
                      "a. 0" = 1, "b. 1 to 2" = 2, "c. 3 to 6" = 3, "d. 7 to 14" = 4, "e. 15 or more" = 5)

networks_jordan = c("net_jo1", "net_jo2", "net_jo3")
summary(df[,networks_jordan])
df$index_networks_jordan = PCA_extract(vars = df[, networks_jordan], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, networks_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, networks_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_networks_jordan_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, networks_jordan], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_networks_jordan_variance_jo.csv")


# social well being jordan -----------------------------------------------
df$soc_jo1 = recode(df$q178.HowconnecteddoyoufeelwithJordaniansociety, 
                     "a. I feel an extremely close connection" = 5, "b. I feel a very close connection" = 4,
                     "c. I feel a moderately close connection" = 3, "d. I feel a weak connection" = 2,
                     "e. I do not feel a connection at all" = 1)
df$soc_jo2 = recode(df$q179.HowoftendoyoufeellikeanoutsiderinJordan, 
                     "a. Never" = 5, "b. Rarely" = 4, "c. Sometimes" = 3, "d. Often" = 2, "e. Always" = 1)

df$educ = recode(df$q256.Whatisyourlevelofeducation, 
                 "a. Illiterate (No Education)" = 1, "b. Primary Level Education" = 2,
                 "c. Secondary Level Education" = 3, "d. University Level Education – Bachelor’s degree" = 4,
                 "e. University Level Education – Master’s degree" = 4, "g. Vocational training" = 3)

df$yrs_in_jo_max = 2019-(df$q129.WhendidthefirstmemberofyourfamilyarrivetoJordan %>% str_remove("^[[:alpha:]]\\.\\s+") %>% as.numeric)
df$yrs_in_jo_max[is.na(df$yrs_in_jo_max)] = mean(df$yrs_in_jo_max, na.rm = T)
df$yrs_in_jo_max = cut(df$yrs_in_jo_max, breaks = quantile(df$yrs_in_jo_max, c(0, .33, .67, 1))) %>% as.numeric
df$yrs_in_jo_max[is.na(df$yrs_in_jo_max)] = 0 #The 0 was converted to NA

df$harass = recode(df$q60.Doesanyoneinyourfamilyfaceverbalorphysicalharassmentmeaningv, "Yes" = 1, "No" = 0,
                .default = NA_real_)

df$rln_neighbor = recode(df$q185.Wouldyoudescribetherelationswithyourneighborsasmostlypositi, "c. Mostly negative" = 0, "b. Neither positive nor negative" = 1, "a. Mostly positive" = 2, .default = NA_real_)

df$help_neighbor = recode(df$q187.Haveyoureceivedhelpfromyourneighbors, "b. No" = 0, "a. Yes" = 1, .default = NA_real_)

social_jordan = c("soc_jo1", "soc_jo2", "educ", "yrs_in_jo_max", "harass", "rln_neighbor", "help_neighbor")
summary(df[,social_jordan])
df$index_social_jordan = PCA_extract(vars = df[, social_jordan], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, social_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, social_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_social_jordan_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, social_jordan], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_social_jordan_variance_jo.csv")



# Legal Jordan -----------------------------------------------
df$unhcr = recode(df$q18.DoallyourfamilymembershaveavalidregistrationwithUNHCR, "Yes" = 1,
               "No" = 0, .default = NA_real_)

df$moi = recode(df$`q22.DoyouhaveaGovernmentServiceCardcurrentlycalled“MOICard”`, "Yes" = 1, "No" = 0, .default = NA_real_)



legal_jordan = c("unhcr", "moi")
summary(df[,legal_jordan])
df$index_legal_jordan = PCA_extract(vars = df[, legal_jordan], cor_type = "poly")[[2]] %>% as.vector()
names = PCA_extract(vars = df[, legal_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% names
vals = PCA_extract(vars = df[, legal_jordan], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
tibble(names, vals) %>% write.csv("out/jordan/loadings/index_legal_jordan_loadings_jo.csv", row.names = F)
PCA_extract(vars = df[, legal_jordan], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv("out/jordan/variance/index_legal_jordan_variance_jo.csv")



# demographics and outcome ------------------------------------------------
df$q125.WhichgovernorateinSyriaareyoufrom %>% table
df = df %>% 
  mutate(female = recode(q4.Gender, "Female" = 1, "Male" = 0, .default = NA_real_), 
         age = as.numeric(q6.Whatisyourage),
         hh_size = as.numeric(q7.Howmanymembersareinyourfamily),
         location_jo = factor(df$CurrentLocation),
         origin_syr = factor(df$q125.WhichgovernorateinSyriaareyoufrom),
         female_headed = ifelse(df$q5.Whoistheheadofyourhousehold %in% c("a. Adult woman (18 or older)", "e. Female youth (15 – 24)",
                                                                         "g. Elderly woman (60 and older)"), 1, 0))

df$rtrn_ever = recode(df$q139.Doyouthinkitwilleverbepossibletoreturntoyourplaceoforiginin, "a. Yes" = 1, "b. Maybe" = 0, "b. No لا" = -1, .default = NA_real_)
df$rtrn_dv = ifelse(df$rtrn_ever == 1, 1, 0)
df$id = 1:nrow(df) %>% as.factor


# Descriptive stats -------------------------------------------------------
descriptives = c("rtrn_dv", 
                 "female", 
                 "age", 
                 "hh_size", 
                 "location_jo", 
                 "origin_syr", 
                 "female_headed",
                 "orig_safe0",
                 "left_violence",
                 "head_conscription",
                 "orig_jobs0",
                 "orig_electricity0",
                 "orig_water0",
                 "orig_schools0",
                 "orig_health0",
                 "friend_rtrn_orig",
                 "friend_rtrn_syr",
                 "know_safety",
                 "know_infrastructure",
                 "know_friends",
                 "orig_know_confidence0",
                 "employed",
                 "aid_twomonths",
                 "aid_change",
                 "work_legal",
                 "food_aid",
                 "health_no_access",
                 "no_school",
                 "net_jo1",
                 "net_jo2",
                 "net_jo3",
                 "soc_jo1",
                 "soc_jo2",
                 "educ",
                 "yrs_in_jo_max",
                 "harass",
                 "rln_neighbor",
                 "help_neighbor",
                 "unhcr",
                 "moi")

x = summarytools::descr(df[,descriptives], transpose = T, stats = c("mean", "sd", "min", "med", "max", "n.valid", "pct.valid"), round.digits = 2)
x$N.Valid = NULL
x$Pct.Missing = 100 - x$Pct.Valid
x$Pct.Valid = NULL
x$Variable2 = row.names(x)
x = x %>% as_tibble

x = x %>% select(Variable2, Mean, everything())
x$Pct.Missing[x$Pct.Missing < 0.005 & x$Pct.Missing > 0]
x[,2:ncol(x)] = x[,2:ncol(x)] %>% round(2)
x$Pct.Missing = sapply(x$Pct.Missing, sprintf, fmt = "%.2f", how = "replace")

x$Pct.Missing = paste0(x$Pct.Missing, "%")

x %>% print(n = 50)

x = x %>%
  mutate(Variable = recode(Variable2, 
                           "age" = "Age (Years)",
                           "aid_change" = "Aid change in last year (0: deteriorated, 1:same, 2: improved)",
                           "aid_twomonths" = "Received aid in last 2 months",
                           "educ" = "Education Level (1: no educ, 2: primary, 3: secondary/vocational, 4: higher)",
                           "employed" = "Household member employed",
                           "female" = "Female",
                           "female_headed" = "Female head of household",
                           "food_aid" = "Received food voucher in last month",
                           "friend_rtrn_orig" = "No. relatives/friends returning to origin (censored at 15)",
                           "friend_rtrn_syr" = "No. relatives/friends returning to Syria (censored at 15)",
                           "harass" = "Household member reported harassment in local area",
                           "head_conscription" = "Eligible for conscription (male 18-42)",
                           "health_no_access" = "Lack access to healthcare",
                           "help_neighbor" = "Received help from neighbors",
                           "hh_size" = "Household size",
                           "know_friends" = "Know about friends and family in Syria",
                           "know_infrastructure" = "Know about status of infrastructure in Syria",
                           "know_safety" = "Know about safety and security in Syria",
                           "left_violence" = "Left Syria because of violence",
                           "moi" = "Possess Ministry of Interior ID",
                           "net_jo1" = "Share meal with Jordanians (1: Never, 5: Almost daily)",
                           "net_jo2" = "Conversations with Jordanians in past week (1: 0, 5: 15 or more)",
                           "net_jo3" = "Conversations with Syrians in Jordan in past week (1: 0, 5: 15 or more)",
                           "no_school" = "Lack access to school",
                           "orig_electricity0" = "Hours of electricity in origin (0: none, 4: 24 hours a day)",
                           "orig_health0" = "Are health centers operating in origin?",
                           "orig_jobs0" = "Availability of jobs in origin (0: very bad, 3: very good)",
                           "orig_know_confidence0" = "Confidence in info about origin (0: not at all, 4: very)",
                           "orig_safe0" = "Safety in origin (0: very dangerous, 3: very safe)",
                           "orig_schools0" = "Are schools operating in origin?",
                           "orig_water0" = "Hours of water in origin (0: none, 4: 24 hours a day)",
                           "rln_neighbor" = "Relation with neighbords (0: negative, 1: neither, 2: positive)",
                           "rtrn_dv" = "Plan to ever return to Syria",
                           "soc_jo1" = "Connection to Jordanian society (1: not at all, 5: extremely close)",
                           "soc_jo2" = "Feel like an outsider (1: always, 5: never)",
                           "unhcr" = "All household members registered with UNHCR",
                           "work_legal" = "Any household member has work permit",
                           "yrs_in_jo_max" = "No. years since first household member arrived in Jordan"
                           ))
x = x %>% select(Variable, everything())

demographic_summary = x %>% filter(Variable2 %in% c("rtrn_dv",
                                    "female",
                                    "age",
                                    "hh_size",
                                    "location_jo",
                                    "origin_syr",
                                    "female_headed")) %>% select(-Variable2)

tab = demographic_summary %>% xtable::xtable() %>% xtable::print.xtable(include.rownames = F)

tab = substr(tab, str_locate(tab, "centering")[2]+2, str_locate(tab, "end\\{table")[2]-10)

cat(tab, file = "out/descriptive/summary_dem_jordan.tex")


syria_summary = x %>% filter(Variable2 %in% c("orig_safe0",
                                              "left_violence",
                                              "head_conscription",
                                              "orig_jobs0",
                                              "orig_electricity0",
                                              "orig_water0",
                                              "orig_schools0",
                                              "orig_health0",
                                              "friend_rtrn_orig",
                                              "friend_rtrn_syr",
                                              "know_safety",
                                              "know_infrastructure",
                                              "know_friends",
                                              "orig_know_confidence0")) %>% select(-Variable2)

tab = syria_summary %>% xtable::xtable() %>% xtable::print.xtable(include.rownames = F)

tab = substr(tab, str_locate(tab, "centering")[2]+2, str_locate(tab, "end\\{table")[2]-10)

cat(tab, file = "out/descriptive/summary_pull_jordan.tex")


jordan_summary = x %>% filter(Variable2 %in% c("employed",
                                               "aid_twomonths",
                                               "aid_change",
                                               "work_legal",
                                               "food_aid",
                                               "health_no_access",
                                               "no_school",
                                               "net_jo1",
                                               "net_jo2",
                                               "net_jo3",
                                               "soc_jo1",
                                               "soc_jo2",
                                               "educ",
                                               "yrs_in_jo_max",
                                               "harass",
                                               "rln_neighbor",
                                               "help_neighbor",
                                               "unhcr",
                                               "moi")) %>% select(-Variable2)

tab = jordan_summary %>% xtable::xtable() %>% xtable::print.xtable(include.rownames = F)

tab = substr(tab, str_locate(tab, "centering")[2]+2, str_locate(tab, "end\\{table")[2]-10)

cat(tab, file = "out/descriptive/summary_push_jordan.tex")



# Plot --------------------------------------------------------------------
indices = df %>% 
  select("id", c(names(df)[str_detect(names(df), "^index")]))

df$origin_syr2 = df$origin_syr %>% as.character()
df$origin_syr2[!str_detect(df$origin_syr2, "Aleppo|Damascus|Dar'a|Homs")] = "Other"
df$origin_syr2 = factor(df$origin_syr2)

covars = c("female", "age", "hh_size", "location_jo", "origin_syr2", "educ", "female_headed")


predictors = names(indices)[2:length(indices)]

for(i in 1:length(predictors)){
  assign(paste0("reg", i), lm_robust(as.formula(paste("rtrn_dv", paste(c(predictors[i], covars), collapse = " + "), sep = " ~ ")), data = df) %>% tidy)
}


df_plot = reg1

for(i in 2:length(predictors)){
  df_plot = df_plot %>% bind_rows(eval(parse(text = paste0("reg", i))))
}

df_plot = df_plot %>% filter(term %in% names(indices))


df_plot$term %>% levels

df_plot = df_plot %>% 
  mutate(bold = "plain") %>% 
  bind_rows(
    tibble(term = c("Pull factors in Syria", "Push factors in Jordan", "Information", " ", "  ", "   "), 
           bold = c(rep("bold", 3), rep("plain", 3)))
  ) %>% 
  mutate(term = factor(term, levels = c("Pull factors in Syria", "index_safety", "index_econ_syria", "index_services_syria", 
                                        "index_networks_syria", " ", "Push factors in Jordan", "index_econ_jordan",
                                        "index_services_jordan", "index_networks_jordan", "index_social_jordan", "index_legal_jordan", "  ", "Information",
                                        "index_info_quality", "   "))) %>% 
  mutate(term = recode(term, "index_econ_jordan" = "Economic well-being ",
                       "index_social_jordan" = "Social well-being ",
                       "index_services_jordan" = "Services ",
                       "index_legal_jordan" = "Legal conditions ",
                       "index_safety" = "Safety",
                       "index_econ_syria" = "Economic well-being",
                       "index_services_syria" = "Services",
                       "index_networks_syria" = "Networks",
                       "index_networks_jordan" = "Networks ",
                       "index_info_quality" = "Confidence in Information")) %>% 
  mutate(term = fct_rev(term)) %>% 
  mutate(outcome = recode(outcome, "rtrn_dv" = "Return ever"),
         outcome = factor(outcome, levels = c("Return ever")),
         conf.low90 = estimate - qt(.95, df) * std.error,
         conf.high90 = estimate + qt(.95, df) * std.error,
  )

df_plot = df_plot %>% arrange(desc(term)) %>% mutate(attributes = c(rep("A", 6), rep("B", 7), rep("C", 3)))

df_plot

df_plot %>% 
  filter(outcome == "Return ever") %>% 
  mutate(BH.adjusted.p = stats::p.adjust(p.value, method = "BH")) %>% 
  mutate(predictor = as.character(term)) %>% 
  mutate(paste = c(rep(" Syria", 4),
                   rep("Lebanon", 5),
                   rep("", 1)
  ),
  predictor = paste0(predictor, paste)) %>% 
  rename(unadjusted.p = p.value) %>% 
  select(predictor, estimate, std.error, unadjusted.p, BH.adjusted.p) %>% 
  rename(Predictor = predictor, Estimate = estimate, Std.Error = std.error, Unadjusted.p = unadjusted.p) %>% 
  xtable::xtable(digits = 3, label = "tab:ever_individual", 
                 caption = "Regression results for the predictors of intentions to ever return (using an individual index in each regression) among Syrians in Jordan. 
                 The regression includes controls, government fixed effects in Jordan, and governorate of origin fixed effects in Syria. 
               The table presents unadjusted p-values and Benjamini-Hochberg p-values.") %>% 
  xtable::print.xtable(table.placement = "H", include.rownames = F, file = "tables/jordan.tex")


library(scales)
hex <- hue_pal()(4)

df_plot %>% 
  ggplot(aes(x = term, y = estimate, colour = attributes)) + 
  geom_hline(yintercept = 0,size=.5,colour="darkgrey",linetype=1) + 
  geom_pointrange(aes(ymin = conf.low90, ymax = conf.high90),
                  position= position_dodge(width = .7), size = 0.5) + 
  geom_pointrange(aes(ymin=conf.low, ymax=conf.high, alpha = I(0.65)),
                  position= position_dodge(width = .7), size = 0.5) + 
  coord_flip(ylim = c(-0.04, 0.04)) + 
  labs(x = "", y = "Relationship between predictors\nand return intentions", title = "Return Ever") + 
  scale_color_manual(values = hex[2:4], guide = "none") + 
  theme_economist(base_size = 10, base_family = "") + 
  theme(axis.text.x = element_text(size = 10, hjust = .5 , vjust=1, colour = "black"),
        axis.text.y = element_text(size = 10, face = rev(df_plot$bold)),
        axis.ticks.x = element_blank(),
        axis.title.y = element_text(size = 10,angle=90,
                                    vjust=.01,hjust=.1),
        axis.title.x = element_text(size = 10),
        legend.position = "none",
        plot.title = element_text(size = 12, face = "bold", hjust = .5))

ggsave("figures/jordan_rtrn_ever.png", height = 6, width = 6)
